#include <stdio.h>
#include <stdlib.h>
#include "SuffixTree.h"
#include "Bits.h"
#include <time.h>

#define EMPTY_BIT 2

extern FILE *outputFile;
extern char smallMergedFileNameprefix[];
char currOutputFileName[MAX_PATH_LENGTH];

//filled in reader
extern int *buffersLengths;  //the current lengths of the in-memory buffers
extern int *buffersPointers;    //from 0 to bufferslengths
extern Suffix  **buffers;   //buffers with suffix arrays chunks
extern int output_buffer_max;
extern int *totallengthsdoubled;
extern int numberOfChunks;

extern unsigned int masks_array32[32];
extern int numBitsInLong; //is 32 bits, we treat the input sequence as doubled in size
extern int shiftDivision; //is log lengthOfLong - in order to divide by lengthOfLong

//Variables for sorting heap
HeapNode *heap;
int counter=0;
HeapNode lastTransferred;
int maxHeapCapacity;

int processingCounter=0;
extern char smallBinaryFileNamePrefix[];
extern char smallMergedFileNameprefix[];
//HeapNode *prev; check

STNode *outputBuffer;
STNode *outputRoot;
int outputCounter=0;

int dividerCounter=0;
int outputFileNumberCounter=0;
extern FILE *dividersFile;
DividerElement currentDivider[1];
extern unsigned int **binaryString;
PathElement *lastPath;
int pathCounter;


int prevLCP;
int shiftDivision=5; //log 32
void printNode(STNode *node)
{
	printf("NODE startPos=%d, file=%d, length=%d, child[0]=%d, child[1]=%d,  nextleaf=%d\n",
		node->startPos,node->fileNumber,node->length,node->children[0],node->children[1],node->nextLeaf);
}

void printLastPath(HeapNode *last, int lcpwithnext)
{
	int i;
	unsigned int tbr_bitprefix=getBitsPrefix(last->fileNumber,last->startPos+lcpwithnext-1);
	printf("Last path after insertion of suffix startPos=%d file=%d of length %d\n",
		last->startPos,last->fileNumber,totallengthsdoubled[last->fileNumber]-last->startPos);
	printBitSequence(&tbr_bitprefix);
	for(i=0;i<pathCounter;i++)
	{
		printf("level %d node: \n",i );
		printNode(lastPath[i].node);
		printf(" distance from root is %d\n",lastPath[i].length);
	}
}

STNode *getNewOutputTreeNode(int *newID)
{
	STNode *ret=&outputBuffer[outputCounter];	
	(*ret).children[0]=0;
	(*ret).children[1]=0;
	(*ret).nextLeaf=0;
	*newID=outputCounter++;
	return ret;
}



unsigned int getBitsPrefix(int fileNumber,int startPos)
{
	//IMPORTANT - startPos in the doubled sequence of 0 and 1
	
	//printf("start pos=%d\n ",startPos);
	int longPos;
	int tmp;
	short startBitInLong;
	unsigned int bitSequence=0;
	unsigned int first=0;
	unsigned int second=0;
	

	if(startPos>=totallengthsdoubled[fileNumber])
	{
		printf("Invalid position in the binary encoded file\n");
		exit (1);
	}
	
	
	
	longPos=startPos>>shiftDivision;
	tmp=(longPos<<shiftDivision);
	startBitInLong=(short)(startPos-tmp);

	first=binaryString[fileNumber][ longPos];

	if(!startBitInLong)
		return first;

	first<<=startBitInLong;

	if(startPos+(numBitsInLong-startBitInLong)>=totallengthsdoubled[fileNumber])
		return first;
	
	bitSequence |=first;

	second=binaryString[fileNumber][ longPos+1];

	second>>=(numBitsInLong-startBitInLong);
	bitSequence |=second;

	
	return bitSequence;	

}

int getLCP(HeapNode *prev, HeapNode *next,short *firstbitafterlcp)
{
	int b;
	int iteration=1;
	
	unsigned int diff;

	unsigned int firstone=(unsigned int) (1 << (numBitsInLong-1));//number 1000000...
	
	int currLCP=0;

	int start1=(*prev).startPos;
	int start2=(*next).startPos;

	int file1=(*prev).fileNumber;
	int file2=(*next).fileNumber;

	int len1=totallengthsdoubled[file1]-start1;
	int len2=totallengthsdoubled[file2]-start2;	
	
	unsigned int bitPrefix1=(*prev).bitsPrefix1;
	unsigned int bitPrefix2=(*next).bitsPrefix1;	
	iteration++;

	diff=(bitPrefix1)^(bitPrefix2);

	while(diff==0)
	{
		if(len1<=numBitsInLong)
		{
			if(len2==len1)
				*firstbitafterlcp=EMPTY_BIT;
			else if(len2>len1)
			{
				if(len1==numBitsInLong)
				{
					start2+=numBitsInLong;
					bitPrefix2=getBitsPrefix(file2,start2);
					*firstbitafterlcp=((bitPrefix2 & masks_array32[numBitsInLong-0-1])!=0);
				}
				else
					*firstbitafterlcp=((bitPrefix2 & masks_array32[numBitsInLong-len1-1])!=0);
			}
			else
			{
				printBitSequence(&bitPrefix1);
				printBitSequence(&bitPrefix2);
				printf("LCP computation error 1\n");
				exit(1);
			}
			return (currLCP+len1);
		}

		currLCP+=numBitsInLong;

		len1-=numBitsInLong;
		len2-=numBitsInLong;

		start1+=numBitsInLong;
		start2+=numBitsInLong;

		if(iteration==2)
		{
			 bitPrefix1=(*prev).bitsPrefix2;
			 bitPrefix2=(*next).bitsPrefix2;
			 iteration++;
		}
		else
		{
			bitPrefix1=getBitsPrefix(file1,start1);
			bitPrefix2=getBitsPrefix(file2,start2);
		}

	

		diff=(bitPrefix1)^(bitPrefix2);
	}

//prev or shorter or is smaller - therefore should be 1 in next where 0 in prev
	for(b=0;diff!=0 && b<len1;diff=diff<<1,b++)
	{		
		//if it is still inside this loop, then LCP is in the middle of both bit strings
		if(diff &(firstone))
		{
			*firstbitafterlcp=((bitPrefix2 & masks_array32[numBitsInLong-b-1])!=0);
			return (currLCP+b);
		}			
	}
		
		
	//if it is here, then it exited the for loop since remaininglength1=0
	//b-1 was the last position where they were equal, after b bits, there is an empty string in 
	//the prev suffix, and there is some bit in the next suffix
	if(len1<numBitsInLong)
	{
		if(len2==len1)
			*firstbitafterlcp=EMPTY_BIT;
		else if(len2>len1)
			*firstbitafterlcp=((bitPrefix2 & masks_array32[numBitsInLong-b-1])!=0);
		else
		{
			printBitSequence(&bitPrefix1);
			printBitSequence(&bitPrefix2);
			printf("LCP computation error 2\n");
			exit(1);
		}
		return (currLCP+b);
	}
	printf("LCP computation error 3\n");
	exit(1);

	return -1;
}

//>0 -		first>second 
//<0 -	first<second
//0 -		equal 
int compare(HeapNode *first, HeapNode *second)
{

	int iteration=1;

	int start1=(*first).startPos;
	int start2=(*second).startPos;

	int file1=(*first).fileNumber;
	int file2=(*second).fileNumber;

	int len1=totallengthsdoubled[file1]-start1;
	int len2=totallengthsdoubled[file2]-start2;

	unsigned int bitPrefix1=(*first).bitsPrefix1;
	unsigned int bitPrefix2=(*second).bitsPrefix1;

	iteration++;
	
	if(file1==file2)
		return -1; //since they are sorted in the suffix array of the same file, first is smaller
	
	while(bitPrefix1==bitPrefix2)
	{
		if(len1<=numBitsInLong || len2<=numBitsInLong)
		{
			if(len1>len2)//first is longer
			{ 
			
				return 1;
			}
			return -1; //second is longer
		}	
		
		len1-=numBitsInLong;
		len2-=numBitsInLong;

		start1+=numBitsInLong;
		start2+=numBitsInLong;

		if(iteration==2)
		{
			bitPrefix1=(*first).bitsPrefix2;
			bitPrefix2=(*second).bitsPrefix2;
			iteration++;
		}
		else
		{
			bitPrefix1=getBitsPrefix(file1,start1);
			bitPrefix2=getBitsPrefix(file2,start2);
		}
	}

	if(bitPrefix1>bitPrefix2)
	{
	
		return 1;
	}

	if(bitPrefix1<bitPrefix2)
		return -1;	
	
	//absolutely equal suffixes
	return 0;
}



Suffix *getNextStartPos(int bufferNumber)
{
	
	if(buffersLengths[bufferNumber]<1)
		return NULL;

	if(buffersPointers[bufferNumber]<buffersLengths[bufferNumber])
	{		
		return &(buffers[bufferNumber][buffersPointers[bufferNumber]++]);
	}
	else
	{
		refillBuffer(bufferNumber);			
		return getNextStartPos(bufferNumber);
	}
}

void addNewSuffix(Suffix *suffix, int chunkID)
{
	//IMPORTANT - suffixStart in the doubled sequence of 0 and 1
	HeapNode newNode;
	
	int suffixStart=((*suffix).startPos)<<1;  //multiple of 2^1

	int parent, child;
	if (counter == maxHeapCapacity) 
	{
		printf( "ERROR: heap is full\n");
		exit(1);
	}

	newNode.startPos=suffixStart;	
	newNode.fileNumber=chunkID;
	newNode.bitsPrefix1=(*suffix).bitsPrefix1;
	newNode.bitsPrefix2=(*suffix).bitsPrefix2;
  
	child = counter++; /* the next available slot in the heap */
	while (child > 0) 
	{
		parent = (child - 1) / 2;
		if (compare(&heap[parent],&newNode)>0) 
		{
			heap[child] = heap[parent];
			child = parent;
		} 
		else 
		{
			break;
		}
	}
	heap[child]= newNode;	
}


void initializeMerge()
{
	int newID;
	int i;
	
//output buffer will contain output_buffer_max leaves, thus it requires twice as many tree nodes 
//output_buffer_max<<1=output_buffer_max*2 (+2 for safety - to avoid overflow)	
	outputBuffer=(STNode*) calloc (output_buffer_max, sizeof(STNode));  
//cannot be more internal nodes in the path from root than there are leaves in the current subtree	
	lastPath=(PathElement *)calloc(output_buffer_max,sizeof(PathElement));  
	pathCounter=0;
	
	outputRoot=getNewOutputTreeNode(&newID);
	heap=(HeapNode*) calloc (numberOfChunks, sizeof(HeapNode));
	maxHeapCapacity=numberOfChunks;

	lastPath[pathCounter].node=outputRoot;
	lastPath[pathCounter].length=0;

	pathCounter++;
	//add first element of each buffer -if there is at least one
	for(i=0;i<numberOfChunks;i++)
	{
		if(buffersLengths[i]>0)
		{
			Suffix *zeroSuffixStart=getNextStartPos(i);			
			addNewSuffix(zeroSuffixStart, i);
			
		}
	}	
	lastTransferred.fileNumber=-1;	
	prevLCP=-1;
}

HeapNode getSmallestHeapNode()
{
	HeapNode result, item;
	int child, parent;
	if (counter == 0) 
	{
		printf( "ERROR: heap is empty\n");
		exit(1);
	}
	result= heap[0];   /* to be returned */
	
	item = heap[--counter]; /* to be reinserted */ 
	parent = 0; 

	while ((child = (2 * parent) + 1) < counter) 
	{
		/* if there are two children, compare them */
		if (child + 1 < counter && (compare(&heap[child],&heap[child + 1])>0)) 
		{
			++child;
		}
		/* compare item with the larger */
		if (compare(&item, &heap[child])>0) 
		{
			heap[parent] = heap[child];
			parent = child;
		} 
		else 
		{
			break;
		}
	}
	heap[parent] = item;
	
	return result;
}

void restartOutputTree()
{	
	int written;
	int outputRootID;
	currentDivider[0].length=outputCounter;
	currentDivider[0].maxBitSequence=lastTransferred.bitsPrefix1;
	
	if(fwrite(currentDivider, sizeof (DividerElement), 1, dividersFile)!=1)
	{
		printf("could not write current divider element\n");
		exit(1);
	}	
	
	sprintf(currOutputFileName,"%s_%i", smallMergedFileNameprefix,(*currentDivider).fileNumber);
	
	//Open output file for writing merged suffixes
	if(!(outputFile=fopen(currOutputFileName,"wb")))
	{
		printf("Cannot open suffix array  %s for writing tree\n",currOutputFileName);
		exit(1);
	}

	written=fwrite(outputBuffer, sizeof (STNode), outputCounter, outputFile);
	if(written!=outputCounter)
	{
		printf("not all output tree nodes were written\n");
		exit(1);
	}	
	
	fclose(outputFile);		

	outputFileNumberCounter++;
	
	outputCounter=0;
	
	outputRoot=getNewOutputTreeNode(&outputRootID);	
	lastTransferred.fileNumber=-1;
	pathCounter=0;
	lastPath[pathCounter].node=outputRoot;
	lastPath[pathCounter].length=0;
	pathCounter++;
}

void moveUpPath(int lcp)
{
	if(lastPath[pathCounter-1].length>lcp)
	{
		if(pathCounter>=2)
		{
			if(lastPath[pathCounter-2].length<lcp) //in the middle of the edge between last and 1 before last
				return; 
			if(lastPath[pathCounter-2].length==lcp)
			{
				pathCounter--;
				return;
			}

			if(lastPath[pathCounter-2].length>lcp)
			{
				pathCounter--;
				moveUpPath(lcp);
			}
		}
		else
		{
			printf("logic error - moving up in the tree consisting of 1 level\n");
			exit(1);
		}
		
		
	}
}

void addToOutputTree(HeapNode next)
{
	short firstBit;
	int newID;
	STNode *outputnode, *prevLeaf,  *nextLeaf,  *currParent,  *newLeaf, *replacementNode, *existingNextLeaf;
	
	short firstbitafterlcp=0;
	int newLeafID=0;
	int lcpWithPrevSibling;
	int nextLeafID=0;
	int parentDepth=0;

	
	int suffixStart=(next).startPos;
	int fileNumber=next.fileNumber;
	int totalStringLength=totallengthsdoubled[fileNumber];
	
	unsigned int bitPrefix=getBitsPrefix(fileNumber,suffixStart);

	if(suffixStart==21366)
	{
		parentDepth=0;
	}
	//Case 0A. first time insertion into an empty tree
	if(lastTransferred.fileNumber==-1 || outputCounter==1)
	{
		//initialize divider
		
		currentDivider[0].fileNumber=outputFileNumberCounter;
		currentDivider[0].minBitSequence=bitPrefix;
		firstBit=getBit(& bitPrefix,0);
		
		outputnode=getNewOutputTreeNode(&newID);
		(*outputnode).startPos=(next).startPos;
		(*outputnode).fileNumber=fileNumber;
		(*outputnode).length=totalStringLength-(*outputnode).startPos;
		
		(*outputRoot).children[firstBit]=newID;
		
		lastPath[pathCounter].node=outputnode;
		lastPath[pathCounter].length=(*outputnode).length;

		pathCounter++;
		prevLCP=0;
		return;
	}
	


	
	lcpWithPrevSibling=getLCP(&lastTransferred,&next,&firstbitafterlcp);	
	
	//Case 0B. new child out of root
	if(lcpWithPrevSibling==0 )
	{		
		outputnode=getNewOutputTreeNode(&newID);
		(*outputnode).startPos=(next).startPos;
		(*outputnode).fileNumber=fileNumber;
		(*outputnode).length=totalStringLength-(next).startPos;
		if((*outputRoot).children[firstbitafterlcp]>0)
		{
			printf("Logic error. Child %d of s tree node already exists\n", firstbitafterlcp);
			exit(1);
		}
		(*outputRoot).children[firstbitafterlcp]=newID;	
		
		
		lastPath[0].node=outputRoot;
		lastPath[0].length=0;
		
		pathCounter=1;
		lastPath[pathCounter].node=outputnode;
		lastPath[pathCounter].length=(*outputnode).length;

		pathCounter++;
		prevLCP=0;
		return;
	}


	//if we need to split up on the border path, we first set the correct end of the path
	if(lcpWithPrevSibling<lastPath[pathCounter-1].length)
		moveUpPath(lcpWithPrevSibling);


	//after this operation, we have 2 cases:
	//Case 1. lcp=depth of the node at the end of lastPath

	//sub-case 1. lcp runs till the end of the current string -> create next leaf
	//sub-case 2. lcp runs till the end of the previous suffix and then continues into a child node
	//			-> this child node should not exist, otherwise lcp would be at least 1 bit bigger

	if(lcpWithPrevSibling==lastPath[pathCounter-1].length)
	{
		//sub-case 1
		if(lcpWithPrevSibling==totalStringLength-suffixStart) 
		{
			//checking correctness
			prevLeaf=lastPath[pathCounter-1].node;
			if(lastPath[pathCounter-1].length!=lcpWithPrevSibling)
			{
				printf("Logic error while adding next suffix with LCP equals remaining suffix length\n");
				exit(1);
			}
			
			//(*prevLeaf).emptyNode=1;
						
			nextLeaf=getNewOutputTreeNode(&nextLeafID);
			(*nextLeaf).fileNumber=fileNumber;
			(*nextLeaf).startPos=suffixStart;
			(*nextLeaf).length=-1;	//till the end of the string of file fileNumber		
			
			if((*prevLeaf).nextLeaf)
			{
				existingNextLeaf=&outputBuffer[(*prevLeaf).nextLeaf];
				while(existingNextLeaf->nextLeaf)
				{
					existingNextLeaf=&outputBuffer[(*existingNextLeaf).nextLeaf];
				}
				(*existingNextLeaf).nextLeaf=nextLeafID;
			}
			
			else
				(*prevLeaf).nextLeaf=nextLeafID;
			
			prevLCP=lcpWithPrevSibling;

			lastPath[pathCounter-1].node=prevLeaf; //last path has the same length
			return;
		}

		//sub-case 2 new lcp=prev lcp but not till the end of file fileNumber
		currParent=lastPath[pathCounter-1].node;
		
	//	if(isALeaf(currParent))
	//		(*currParent).emptyNode=1;

		if(pathCounter>1)
			parentDepth=lastPath[pathCounter-2].length;  //to add to the start pos of a new neaf
		else 
			parentDepth=0;
		if((*currParent).children[0]==0)
		{
			nextLeaf=getNewOutputTreeNode(&nextLeafID);
			(*nextLeaf).fileNumber=(*currParent).fileNumber;
			(*nextLeaf).startPos=(*currParent).startPos-parentDepth;
			(*nextLeaf).length=-1;

			if((*currParent).nextLeaf)
			{
				existingNextLeaf=&outputBuffer[(*currParent).nextLeaf];
				while(existingNextLeaf->nextLeaf)
				{
					existingNextLeaf=&outputBuffer[(*existingNextLeaf).nextLeaf];
				}
				(*existingNextLeaf).nextLeaf=nextLeafID;
			}
			else	
				(*currParent).nextLeaf=nextLeafID;
		}

		newLeaf=getNewOutputTreeNode(&newLeafID);
		(*newLeaf).startPos=suffixStart+lcpWithPrevSibling;
		(*newLeaf).fileNumber=fileNumber;
		(*newLeaf).length=totalStringLength-(*newLeaf).startPos;

		//additional check for correctness
		if((*currParent).children[firstbitafterlcp]>0)
		{
			//printLastPath(&lastTransferred, lcpWithPrevSibling);
			//tbr_bitprefix=getBitsPrefix(fileNumber,suffixStart+lcpWithPrevSibling-1);
			//printf("Now inserting startPos=%d file=%d length=%d with lcp=%d\n",
		//		suffixStart,fileNumber,totallengthsdoubled[fileNumber]-suffixStart,lcpWithPrevSibling);
			//printBitSequence(&tbr_bitprefix);
			//compare(&lastTransferred,&next);
			printf("Logical error. Trying to add a new child where the child already exists.\n");
			exit(1);
		}

		(*currParent).children[firstbitafterlcp]=newLeafID;

		//update path and prevLCP
		prevLCP=lcpWithPrevSibling;

		lastPath[pathCounter].node=newLeaf;
		lastPath[pathCounter].length=lcpWithPrevSibling+(*newLeaf).length;
		pathCounter++;

		return;
	}

	//Case 2. lcp<depth of the node at the last path - we split this node
	if(lcpWithPrevSibling<lastPath[pathCounter-1].length) //additional check
	{
		if(lcpWithPrevSibling<=lastPath[pathCounter-2].length) //additional check should be bigger
		{
			printf("logic error - adding suffix in the middle of an edge, but parent is below the child\n");
			exit(1);
		}

		newID=0;
		replacementNode=getNewOutputTreeNode(&newID);
		
		//independent if there is leaf or internal node at the end of the path, 
		//it will become the parent of a split
	
		currParent=lastPath[pathCounter-1].node;  	//copy content of last node in the lastPath to replacementNode
		*replacementNode=*currParent;		

		(*replacementNode).length=(lastPath[pathCounter-1].length-lcpWithPrevSibling);
		(*replacementNode).startPos=(*currParent).startPos+((*currParent).length-(*replacementNode).length);	

		//we use currParent as the split point on the edge
		//(*currParent).emptyNode=0;
		(*currParent).nextLeaf=0;		
			
		(*currParent).length=(*currParent).length-(*replacementNode).length;		
			
		lastPath[pathCounter-1].length-=(*replacementNode).length;
			
		(*currParent).children[0]=newID;  
			
		//if we split in the middle of the node, nexttolcp is 1 - child[1]

		//add new leaf
		newLeafID=0;	
		newLeaf=getNewOutputTreeNode(&newLeafID);
		(*newLeaf).startPos=suffixStart+lcpWithPrevSibling;
		(*newLeaf).fileNumber=fileNumber;
		(*newLeaf).length=totalStringLength-(*newLeaf).startPos;
		
		if(firstbitafterlcp!=1)
		{
			printf("Logic error - split in the middle of an edge - and the new child is a %d-child\n",firstbitafterlcp);
			exit(1);
		}
		(*currParent).children[1]=newLeafID;
		
		lastPath[pathCounter-1].node=currParent;

		lastPath[pathCounter].node=newLeaf;
		lastPath[pathCounter].length=lcpWithPrevSibling+(*newLeaf).length;
		pathCounter++;
	
		prevLCP=lcpWithPrevSibling;
		return;
	}

	//additional check
	printf("logic error - adding suffix to the tree - not any of the cases???\n");
	exit(1);
	
}


int  mergeSuffixArrays()
{
	Suffix *nextSuffix;
	
	while (counter>0 )
	{		
		HeapNode smallest=getSmallestHeapNode();
		
		addToOutputTree(smallest);
		
		nextSuffix=getNextStartPos(smallest.fileNumber);
		
		
		if(nextSuffix!=NULL)
		{
			addNewSuffix(nextSuffix, smallest.fileNumber);
			processingCounter++;
			if(processingCounter>2000000000)
			{
				printf("processed %d suffices\n",processingCounter);
				processingCounter=0;
			}
		}	
		
		lastTransferred=smallest;

		if(outputCounter>=(output_buffer_max-2))
		{
			printf("Writing subtree %d\n", outputFileNumberCounter);
			restartOutputTree();			
		}				
		
	}

	
	if(outputCounter>0)
	{
		printf("Writing subtree %d\n", outputFileNumberCounter);
		restartOutputTree();		
	}
	return 0;
}
